Plot to compare number of over 65s per 1000 population receiving social care.
Based on 2012 Social Care Survey Open Data. The offical report plots per thousand population receiving home care but for the whole population - let’s concentrate on over 65s.
load("produced_data/created_objects/soc_care12.rds")
# This object is the cleaned version of the 2012 social care survey and was created in the file ("reports/01-import_and_tidy.html") or raw rmarkdown can be found ("rmds/import_and_tidy.Rmd")
library(readr)
library(dplyr)
library(tidyr)
library(purrr)
library(forcats)
library(stringr)
library(ggplot2)
library(ggthemes)
library(extrafont)
theme_set(theme_tufte(base_size = 12))
ubdc_palette <- c("#13AFD6", "#E6E600", "#F07329", "#35B14E", "#D7509A", "#2165AF",
"#BCD032","#866BAC", "#545A5D", "#7A8082", "#E2D988", "#628DB7",
"#929B9A", "#93B8DA", "#31649B", "#FBF8D0", "#ACB2B4", "#D1DAE2")
1st thing is to look at the overall numbers of records returned for each local autority area.
#summarise overall data first
total_summary_2012 <-
soc_care12 %>%
filter(age_grp != "18-<65") %>% #drop those under 65
group_by(council) %>%
summarise(N_total = n()) %>%
mutate(freq_total = N_total / sum(N_total),
pct_total = round((freq_total * 100), 1))
#summarise homecare only data now
home_care_summary_2012 <-
soc_care12 %>%
filter(age_grp != "18-<65") %>% #drop those under 65
filter(hc_client == "Yes") %>% #drop non-home care clients
group_by(council) %>%
summarise(N_homecare = n()) %>%
mutate(freq_homecare = N_homecare / sum(N_homecare),
pct_homecare = round((freq_homecare * 100), 1))
#and finally, summarise telecare only data
telecare_summary_2012 <-
soc_care12 %>%
filter(age_grp != "18-<65") %>% #drop those under 65
filter(hc_client == "No") %>% #keep only non-home care clients
group_by(council) %>%
summarise(N_telecare = n()) %>%
mutate(freq_telecare = N_telecare / sum(N_telecare),
pct_telecare = round((freq_telecare * 100), 1))
#Join these table together
summary_2012_table <-
left_join(total_summary_2012, home_care_summary_2012) %>%
left_join(., telecare_summary_2012)
## Joining, by = "council"
## Joining, by = "council"
summary_2012_table
## # A tibble: 32 x 10
## council N_total freq_total pct_total N_homecare
## <fctr> <int> <dbl> <dbl> <int>
## 1 Aberdeen City 2969 0.0256 2.6 1784
## 2 Aberdeenshire 3308 0.0285 2.9 1828
## 3 Angus 3643 0.0314 3.1 1326
## 4 Argyll & Bute 1774 0.0153 1.5 836
## 5 Clackmannanshire 1317 0.0113 1.1 546
## 6 Dumfries & Galloway 3171 0.0273 2.7 1993
## 7 Dundee City 4991 0.0430 4.3 1614
## 8 East Ayrshire 3086 0.0266 2.7 1632
## 9 East Dunbartonshire 2208 0.0190 1.9 963
## 10 East Lothian 1353 0.0117 1.2 1175
## # ... with 22 more rows, and 5 more variables: freq_homecare <dbl>,
## # pct_homecare <dbl>, N_telecare <int>, freq_telecare <dbl>,
## # pct_telecare <dbl>
rm(list = c("home_care_summary_2012", "total_summary_2012", "telecare_summary_2012"))
Now we can plot these
library(plotly)
sc_count <-
ggplot(summary_2012_table,
aes(
x = reorder(council, -N_total),
y = N_total,
text =paste0(pct_total,"%"))
) +
geom_point(color = ubdc_palette[1], size = 3) +
scale_y_continuous(breaks = c(1000, 2000, 3000, 4000, 5000, 6000, 7500, 10000, 15000)) +
theme(plot.subtitle = element_text(color = "#666666"),
plot.caption = element_text("#AAAAAA", size = 10),
panel.grid.major.y = element_line(colour = "#f2f2f2")) +
labs(
title = "Count of Social Care Clients",
subtitle = "With percentage of total",
x = "",
y = "Number of Clients",
caption = "2012 Social Care Survey") +
coord_flip()
ggplotly(sc_count, tooltip = "pct_total")
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
And repeat for only home care clients
ggplot(summary_2012_table, aes(reorder(council, -N_homecare), N_homecare)) +
geom_point(color = ubdc_palette[3], size = 3) +
geom_text(aes(label=paste0(pct_homecare,"%")), size=3, hjust = -0.3, vjust = -0.2) +
scale_y_continuous(breaks = c(1000, 2000, 3000, 4000, 5000, 6000, 7500, 10000, 15000)) +
theme(plot.subtitle = element_text(color = "#666666"),
plot.caption = element_text("#AAAAAA", size = 10),
panel.grid.major.y = element_line(colour = "#f2f2f2")) +
labs(
title = "Count of Home Care Clients",
subtitle = "With percentage of total",
x = "",
y = "Number of home care clients",
caption = "2012 Social Care Survey") +
coord_flip()
And non-home care clients i.e. telecare or other services only
ggplot(summary_2012_table, aes(reorder(council, -N_telecare), N_telecare)) +
geom_point(color = ubdc_palette[4], size = 3) +
geom_text(aes(label=paste0(pct_telecare,"%")), size=3, hjust = -0.3, vjust = -0.2) +
scale_y_continuous(breaks = c(1000, 2000, 3000, 4000, 5000, 6000, 7500, 10000, 15000)) +
theme(plot.subtitle = element_text(color = "#666666"),
plot.caption = element_text("#AAAAAA", size = 10),
panel.grid.major.y = element_line(colour = "#f2f2f2")) +
labs(
title = "Count of telecare and other services clients",
subtitle = "With percentage of total",
x = "",
y = "",
caption = "2012 Social Care Survey") +
coord_flip()
To calculate proportions of over 65s getting care in each local authority we need the total number of over 65s in that LA.
For this I am going to use the 2011 Population estimates downloaded from the National Records of Scotland here:- https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/2011-census-reconciliation-report/list-of-tables
This data needs imported and cleaned.
I’m going to create pop_data_combined: a data_frame with population sizes of each Local Authority as a total and for over 65s
#Note I cheated and converted data to numeric in excel to remove the commas as 1000 seperators
#import and tidy raw data
pop_data <- read_csv("raw_data/2011-cen-rec-report-pop-tab1b.csv", skip = 3) %>%
slice(1:34) %>%
filter(X1 %in% c("Council areas", "SCOTLAND") == FALSE) %>%
select(-X3, -X23, -X24) %>%
map_at(2:21, as.numeric) %>% #coerce from character to numeric
as_data_frame()
names(pop_data) <- str_replace(names(pop_data), "X1", "council") #rename X variables
names(pop_data) <- str_replace(names(pop_data), "X2", "total")
#tidy data for manipulation
pop_data_2011 <-
pop_data %>%
gather(age_grp, value, 2:21) %>%
arrange(council)
#create table of total population per council
pop_data_total <-
pop_data_2011 %>%
filter(age_grp == "total") %>%
select(-age_grp) %>%
arrange(council)
names(pop_data_total) <- str_replace(names(pop_data_total), "value", "pop_total")
#create table of total over65s per council
pop_data_65plus <-
pop_data_2011 %>%
filter(age_grp %in% c("65 - 69", "70 - 74", "75 - 79", "80 - 84", "85 - 89",
"90+") == TRUE) %>%
group_by(council) %>%
mutate(pop_over_65 = sum(value)) %>%
arrange(council) %>%
select(council, pop_over_65) %>%
unique(.)
#join tables together
pop_data_combined_2011 <-
full_join(pop_data_total, pop_data_65plus)
pop_data_combined_2011
## # A tibble: 32 x 3
## council pop_total pop_over_65
## <chr> <dbl> <dbl>
## 1 Aberdeen City 222793 32031
## 2 Aberdeenshire 252973 40663
## 3 Angus 115978 23061
## 4 Argyll & Bute 88166 19336
## 5 Clackmannanshire 51442 8212
## 6 Dumfries & Galloway 151324 33050
## 7 Dundee City 147268 24597
## 8 East Ayrshire 122767 21307
## 9 East Dunbartonshire 105026 20428
## 10 East Lothian 99717 17763
## # ... with 22 more rows
rm(list = c("pop_data_65plus", "pop_data_total", "pop_data"))
Finally, I’m going to calculate the per thousand proportions.
per_thousand_table <-
summary_2012_table %>%
mutate(any_care_per_thousand = (round((N_total / pop_over_65) * 1000, 1)),
homecare_per_thousand = (round((N_homecare / pop_over_65) * 1000, 1)),
telecare_per_thousand = (round((N_telecare / pop_over_65) * 1000, 1))) %>%
select(council, any_care_per_thousand, homecare_per_thousand, telecare_per_thousand)
per_thousand_table
## # A tibble: 32 x 4
## council any_care_per_thousand homecare_per_thousand
## <fctr> <dbl> <dbl>
## 1 Aberdeen City 92.7 55.7
## 2 Aberdeenshire 81.4 45.0
## 3 Angus 158.0 57.5
## 4 Argyll & Bute 91.7 43.2
## 5 Clackmannanshire 160.4 66.5
## 6 Dumfries & Galloway 95.9 60.3
## 7 Dundee City 202.9 65.6
## 8 East Ayrshire 144.8 76.6
## 9 East Dunbartonshire 108.1 47.1
## 10 East Lothian 76.2 66.1
## # ... with 22 more rows, and 1 more variables: telecare_per_thousand <dbl>
I’m going to save the objects created so far.
save(summary_2012_table, file = "produced_data/created_objects/summary_2012_table.rds")
save(pop_data_2011, file = "produced_data/created_objects/pop_data_2011.rds")
save(pop_data_combined_2011, file = "produced_data/created_objects/pop_data_combined_2011.rds")
save(per_thousand_table, file = "produced_data/created_objects/per_thousand_table.rds")
homecare_plot<-
per_thousand_table %>%
filter(council != "Highland") %>%
ggplot(.,
aes(x = reorder(council, -homecare_per_thousand),
y = homecare_per_thousand)) +
geom_col(fill = "chartreuse4") +
labs(
title = "Figure 1: Number of over65s per thousand receiving home care",
subtitle = "by Local Authority",
x = "",
y = "Number per thousand over 65s",
caption = "2012 Social Care Survey, population estimates from NRS 2011 census estimate
Highland council removed due to large amount of missing data") +
theme_minimal(base_size=9, base_family="Roboto") +
theme(plot.subtitle = element_text(color="#666666"),
plot.title = element_text(family="Roboto Condensed", face = "bold"),
plot.caption = element_text(color="#AAAAAA", size=8),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 10, face = "bold")) +
coord_flip()
homecare_plot
Save this plot (for UBDC blog!)
ggsave("plots/over65s_homecare.png", homecare_plot, width=6, height=6)
per_thousand_table %>%
filter(council != "Highland") %>%
ggplot(.,
aes(x = reorder(council, -telecare_per_thousand),
y = telecare_per_thousand)) +
geom_col(fill = "chartreuse4") +
theme(axis.text.y = element_text(size = 14, face = "bold")) +
theme(axis.text.x = element_text(size = 14, face = "bold")) +
theme_hc() +
labs(
title = "Number of over65s per thousand receiving telecare or other services only",
subtitle = "by Local Authority",
x = "",
y = "Number per thousand over 65s",
caption = "2012 Social Care Survey, population estimates from NRS 2011 census estimate
Highland council removed due to large amount of missing data") +
coord_flip()
Going to try a Clevland dotplot.
homecare_plot_clevland<-
per_thousand_table %>%
filter(council != "Highland") %>%
ggplot(.,
aes(x = reorder(council, -homecare_per_thousand),
y = homecare_per_thousand)) +
geom_point(size = 1, color = ubdc_palette[1]) +
labs(
title = "Figure 1: Standardised rate per 1000 over-65\n receiving home care",
subtitle = "by Local Authority",
x = "",
y = "Standardised rate per 1000 65+",
caption = "2012 Social Care Survey \nPopulation estimates from NRS 2011 census estimate\nHighland council removed due to large amount of missing data") +
theme(text = element_text(size = 6),
axis.text.x = element_text(face = "bold"),
panel.grid.major = element_line(colour = "#e6e6e6"),
plot.title = element_text(hjust = 0),
plot.caption = element_text(colour = "#AAAAAA"),
plot.subtitle = element_text(colour = "#666666")) +
coord_flip()
homecare_plot_clevland
ggsave("plots/over65s_homecare_clevland.png", homecare_plot_clevland, width = 4, height = 3)